############################################### File notes #######################################

############################### Clean workspace and define settings ##############################


### Run script to set up environment
rm(list=ls())


### Load packages if not already installed
install.packages("K:/BE/1261/Cases/Walgreens_Rite Aid_1610026/Research/Brian/Code/Code/R Packages/lfe_2.5-1998.zip", repos = NULL, type = "win.binary")
install.packages("K:/BE/1261/Cases/Walgreens_Rite Aid_1610026/Research/Brian/Code/R Packages/purrr_0.2.4.zip", repos = NULL, type = "win.binary")
install.packages("K:/BE/1261/Cases/Walgreens_Rite Aid_1610026/Research/Brian/Code/R Packages/tidyselect_0.2.3.zip", repos = NULL, type = "win.binary")
install.packages("K:/BE/1261/Cases/Walgreens_Rite Aid_1610026/Research/Brian/Code/R Packages/tidyr_0.7.2.zip", repos = NULL, type = "win.binary")
library(openxlsx)
library(foreign)
library(Matrix)
library(data.table)
library(sp)
library(maps)
library(maptools)
library(ggplot2)
library(xtable)
library(lattice)
library(survival)
library(Formula)
library(Hmisc)
library(doBy)
library(readr)
library(tools)
library(tidyr)
library(readstata13)

dataIn <- file.path("K:","BE","1257","Projects","Medicare Claims","HospRetro","Drafts","Health_outcomes","FigureCreation","PSPatients","RefereeResponse","MakeTables")

#the filepath called "dataIn" had these .dta files in it (this is important for the code below):
# "death_fe_small.dta"          "death_lim_small.dta"         "diab_fe_small.dta"           "Diab_lim_small.dta"          "femAllBlocks.dta"            "fembetaBlockDiab.dta"        "fembetaDeath.dta"           
# "fembetaDiab.dta"             "fembetaDiab_overall.dta"     "fembetaHyp.dta"              "fembetaHyp_overall.dta"      "femblockDeath_overall.dta"   "femDiabBlocks.dta"           "femHypBlocks.dta"           
# "hyp_fe_small.dta"            "Hyp_lim.dta"                 "ll_death_small.dta"          "ll_diab_small.dta"           "ll_hyp_small.dta"            "maleAllBlocks.dta"           "malebetaBlockDeath.dta"     
# "malebetaBlockHyp.dta"        "malebetaDeath.dta"           "malebetaDiab.dta"            "malebetaDiab_overall.dta"    "malebetaHyp.dta"             "malebetaHyp_overall.dta"     "maleblockDeath_overall.dta" 
# "maleDiabBlocks.dta"          "maleHypBlocks.dta"           "mbm_death_small.dta"         "mbm_diab_small.dta"          "mbm_hyp_small.dta"           "overall_death_zip_small.dta" "Overall_diab_zip_small.dta" 
# "Overall_hyp_zip_small.dta"   "PS_YMeans_1_fillin.dta"      "PS_YMeans_2_fillin.dta"      "YMeans_fillin1.dta"          "YMeans_fillin2.dta"


############################################### create functions for later use ################ 
options(scipen = 999)
#Readin function
fn.readin <- function(x){
   z <- read.dta13(paste0(dataIn,"/",x)) #dta. files
   z$dataname <- as.character(x) #Use the file name to determine sex results 
   z$sex <- as.character(x) #Use the file name to determine sex results 
   z$sex <- gsub(".dta","",gsub("_overall.dta","",z$sex)) 
   z$cond  <- gsub("fembeta","",gsub("malebeta","",z$sex)) #Use the file name to determine outcomes 
   z$sex  <- gsub("betahyp","",gsub("betadiab","",z$sex))
   z$cond[z$cond=="hyp"]  <- "Hypertension"
   z$cond[z$cond=="diab"]  <- "Diabetes"
   z
   return(z)
 }

#MATCHING FUNCTIONS
 #Limit  
 fn.limmat <- function(x){
   names(x)[grepl("rndiabcomp1_",names(x))] <- "rn"  
   names(x)[grepl("rnami_",names(x))] <- "rn"  
   x <- x[grepl("postmerger",x$rn ),]
   x <- x[,names(x)[!grepl("rn",names(x)) & !grepl("dataname",names(x))]]
   name <- names(x)[grepl("sesq",names(x)) | grepl("beta",names(x)) | grepl("mem",names(x)) | grepl("mef",names(x)) ]
   x <- x[,name]
   return(x)
 }   

 fn.obcount <- function(x){
   for(i in names(x)[grepl("rn",names(x))] )  {
     x$flag[x[,c(i)]=="" ]  <- 1
   }
   for(i in names(x)[grepl("beta",names(x))] )  {
     x[is.na(x[,c(i)]),c(i)]  <- 0
   }
   x <- x[x$flag ==1,]
   for(i in names(x)[grepl("beta",names(x))] )  {
     x  <- x[!is.na(x[,c(i)]),]
     x[,c(i)]  <- max(x[,c(i)])
   }
   x <- x[ ,names(x)[grepl("beta",names(x)) ] ]
   x <- x[!duplicated(x) ,]
   z <- names(x)
   x <- reshape(data=x, direction = "long", timevar = "Outcome",v.names = "N", idvar = row.names(x), varying = list(names(x)) )
   x$Outcome <- z
   names(x) <- c("Outcome","N","id")
   

   x$Cond[grepl("_3",x$Outcome)] <- "Diabetes"
   x$Cond[grepl("_2",x$Outcome)] <- "Hypertension"
   x$Cond[grepl("_1",x$Outcome)] <- "All"
   
   x$Outcome[grepl("diabcomp1",x$Outcome)] <- "Asymptomatic"
   x$Outcome[grepl("diabcomp2",x$Outcome)] <- "Symptomatic"
   x$Outcome[grepl("glaucoma",x$Outcome)] <- "Glaucoma"
   
   x$Outcome[grepl("ihd",x$Outcome)] <- "Ischemic heart"
   x$Outcome[grepl("ami",x$Outcome)] <- "AMI"
   x$Outcome[grepl("acuteicd8",x$Outcome)] <- "Acute cardiac"
   
   x$Outcome[grepl("death",x$Outcome)] <- "Mortality"

   x$z1 <- x$Cond
   x$z2 <- x$Outcome
   x$Outcome[x$z2 == "Mortality"] <- x$z1[x$z2 == "Mortality"]
   x$Cond[x$z2 == "Mortality"] <- "Mortality"
   x <- x[,names(x)[names(x)!="z1" & names(x)!="z2"]]
   x <- x[!grepl("beta",x$Outcome),]
   x <- x[!(grepl("All",x$Cond) & grepl("AMI",x$Outcome)),]
   
   return(x)
   
 }   
 
 fn.rsq <- function(x){
   for(i in names(x)[grepl("rn",names(x))] )  {
     x$flag[x[,c(i)]=="rsq" ]  <- 1
   }
   x <- x[x$flag ==1 & !is.na(x$flag),]
   
   for(i in names(x)[grepl("rn",names(x))] )  {
     j <- paste0("beta_",gsub("rn","",i))
     x[,c(j)]  <- abs(x[,c(j)])
     x[,c(j)]  <- x[x[,c(i)]=="rsq" ,c(j)] 
   }
   
      
   x <- x[ ,names(x)[grepl("beta",names(x)) ] ]
   x <- x[!duplicated(x) ,]
   z <- names(x)
   x <- reshape(data=x, direction = "long", timevar = "Outcome",v.names = "N", idvar = row.names(x), varying = list(names(x)) )
   x$Outcome <- z
   names(x) <- c("Outcome","Rsq","id")
   
   
   x$Cond[grepl("_3",x$Outcome)] <- "Diabetes"
   x$Cond[grepl("_2",x$Outcome)] <- "Hypertension"
   x$Cond[grepl("_1",x$Outcome)] <- "All"
   
   x$Outcome[grepl("diabcomp1",x$Outcome)] <- "Asymptomatic"
   x$Outcome[grepl("diabcomp2",x$Outcome)] <- "Symptomatic"
   x$Outcome[grepl("glaucoma",x$Outcome)] <- "Glaucoma"
   
   x$Outcome[grepl("ihd",x$Outcome)] <- "Ischemic heart"
   x$Outcome[grepl("ami",x$Outcome)] <- "AMI"
   x$Outcome[grepl("acuteicd8",x$Outcome)] <- "Acute cardiac"
   
   x$Outcome[grepl("death",x$Outcome)] <- "Mortality"
   
   x$z1 <- x$Cond
   x$z2 <- x$Outcome
   x$Outcome[x$z2 == "Mortality"] <- x$z1[x$z2 == "Mortality"]
   x$Cond[x$z2 == "Mortality"] <- "Mortality"
   x <- x[,names(x)[names(x)!="z1" & names(x)!="z2"]]
   x <- x[!grepl("beta",x$Outcome),]
   x <- x[!(grepl("All",x$Cond) & grepl("AMI",x$Outcome)),]
   
   return(x)
   
 }   
 
 
  
 fn.mef <- function(x) {
   x <- x[,names(x)[grepl("mef",names(x))]]
   x <- x[!is.na(x[,1]),]
   name <- names(x)
   x$id <- row.names(x) 
   x <- reshape(data=x, direction = "long", timevar = "Outcome",v.names = "te.Women", idvar = "id", varying = list(names(x)[!grepl("id",names(x))]))
   x <- x[order(x$id,x$Outcome),]
   x$Outcome <- c(name,name)
   x$Outcome <- gsub("mef_","",x$Outcome)  
   x$Cond[grepl("_2", x$Outcome )] <-"Hypertension"  
   x$Cond[grepl("_3", x$Outcome )] <-"Diabetes"  
   x$te.Women[x$id==2] <- sqrt(x$te.Women[x$id==2])

   return(x)
 }

 fn.mem <- function(x) {
   x <- x[,names(x)[grepl("mem",names(x))]]
   x <- x[!is.na(x[,1]),]
   name <- names(x)
   x$id <- row.names(x) 
   x <- reshape(data=x, direction = "long", timevar = "Outcome",v.names = "te.Men", idvar = "id", varying = list(names(x)[!grepl("id",names(x))]))
   x <- x[order(x$id,x$Outcome),]
   x$Outcome <- c(name,name)
   x$Outcome <- gsub("mem_","",x$Outcome)  
   x$Cond[grepl("_2", x$Outcome )] <-"Hypertension"  
   x$Cond[grepl("_3", x$Outcome )] <-"Diabetes"  
   x$te.Men[x$id==2] <- sqrt(x$te.Men[x$id==2])
   return(x)
 }

 fn.beta <- function(x) {
   x <- x[,names(x)[grepl("beta",names(x))]]
   x <- x[x[,1] !=0,]
   name <- names(x)
   x$sex <- row.names(x) 
   x <- reshape(data=x, direction = "long", idvar = "sex",timevar = "Outcome",v.names = "te", varying = list(names(x)[!grepl("sex",names(x))]))
   x <- x[order(x$sex,x$Outcome),]
   x$Outcome <- c(name,name)
   x$sex[x$sex==3] <- "Men"
   x$sex[x$sex==4] <- "Women"
   x <- reshape(data=x, direction = "wide", timevar = "sex" , idvar = "Outcome")
   x$id <- "1"
   return(x)
 }
 
 fn.sesq <- function(x) {
   x <- x[,names(x)[grepl("sesq",names(x))]]
   x <- x[x[,1] !=0,]
   name <- names(x)
   x$sex <- row.names(x) 
   x <- reshape(data=x, direction = "long", timevar = "Outcome",v.names = "te", idvar = "sex", varying = list(names(x)[!grepl("sex",names(x))]))
   x <- x[order(x$sex,x$Outcome),]
   x$Outcome <- c(name,name)
   x$sex[x$sex==3] <- "Men"
   x$sex[x$sex==4] <- "Women"
   x$te <- sqrt(x$te)
   x <- reshape(data=x, direction = "wide", timevar = "sex" , idvar = "Outcome")
   x$id <- "2"
   
   return(x)
 }
 
 fn.matfmt <- function(x) {
   x$Cond[grepl("_3",x$Outcome)] <- "Diabetes"
   x$Cond[grepl("_2",x$Outcome)] <- "Hypertension"
   x$Cond[grepl("_1",x$Outcome)] <- "All"
   
   x$Outcome[grepl("diabcomp1",x$Outcome)] <- "Asymptomatic"
   x$Outcome[grepl("diabcomp2",x$Outcome)] <- "Symptomatic"
   x$Outcome[grepl("glaucoma",x$Outcome)] <- "Glaucoma"
   
   x$Outcome[grepl("ihd",x$Outcome)] <- "Ischemic heart"
   x$Outcome[grepl("ami",x$Outcome)] <- "AMI"
   x$Outcome[grepl("acuteicd8",x$Outcome)] <- "Acute cardiac"

   x$Outcome[grepl("death",x$Outcome)] <- "Mortality"
   
   z <- round(x[,names(x)[ grepl("Women",names(x)) | grepl("Men",names(x))]],4)
   x<- x[,names(x)[!grepl("te",names(x))]]
   x <- cbind(x ,z)
   x <- x[order(x$Cond,x$Outcome),]

   x <- x[ !grepl("beta",x$Outcome),]
   x <- x[ !grepl("sesq",x$Outcome),]
   x <- x[ !grepl("_",x$Outcome),]
   
   x <- x[!(x$Outcome=="AMI" & x$Cond=="All"),]
   
   x$z1 <- x$Cond
   x$z2 <- x$Outcome
   x$Outcome[x$z2 == "Mortality"] <- x$z1[x$z2 == "Mortality"]
   x$Cond[x$z2 == "Mortality"] <- "Mortality"
   x <- x[,names(x)[names(x)!="z1" & names(x)!="z2"]]
   x$z1 <- x$Cond
   x$z2 <- x$Outcome
   x <- x[,names(x)[names(x)!="z1" & names(x)!="z2"]]

   x <- x[order(x$Cond,x$Outcome,x$id),]
   x$m.se[x$id==1] <- x$te.Men[x$id==2]
     x$m.se[x$id==2] <- 0
   x$w.se[x$id==1] <- x$te.Women[x$id==2]
     x$w.se[x$id==2] <- 0
   
   x$te.Men[ x$id==1 & abs(x$te.Men)>=2*x$m.se ] <- paste0(x$te.Men[ x$id==1 & abs(x$te.Men)>=2*x$m.se ],"*")
   x$te.Women[ x$id==1 & abs(x$te.Women)>=2*x$w.se] <- paste0(x$te.Women[ x$id==1 & abs(x$te.Women)>=2*x$w.se],"*")

   x$te.Men[ x$id==2 ] <- paste0("(",x$te.Men[ x$id==2 ],")")
   x$te.Women[ x$id==2 ] <- paste0("(",x$te.Women[ x$id==2 ],")")
   x$ov[x$Cond=="Mortality"] <- 1
   x$ov[x$Cond=="Diabetes"] <- 2
   x$ov[x$Cond=="Hypertension"] <- 3
   x <- x[order(x$ov,x$Outcome),]
   x$m.se <- NULL
   x$w.se <- NULL
   return(x) 
}

 ############################################### start executing functions  ################ 
 
 
 #Read in the files and clean them up
 #i keep the files labeled "death", only, because the file was created last and therefore has all of the results (ie., hypertension, death, diabetes) in it.
 files_match <- dir(dataIn)[(grepl("_lim",dir(dataIn)) | grepl("_zip",dir(dataIn)) | grepl("_fe",dir(dataIn))) & grepl("death",dir(dataIn))  ]
 
 match.data <- lapply(files_match , fn.readin )
 obs.count <- lapply(match.data,fn.obcount)
 rsq <- lapply(match.data,fn.rsq)
 
 match.data <- lapply(match.data,fn.limmat) #limit the ols data 
  match.beta <- lapply(match.data,fn.beta)
  match.sesq <- lapply(match.data,fn.sesq)
  match.mef <- lapply(match.data[2:3],fn.mef)
  match.mem <- lapply(match.data[2:3],fn.mem)
  
match.me <- lapply(1:length(match.mef), function(x) merge(match.mef[[x]],match.mem[[x]], by = intersect(names(match.mem[[x]]),names(match.mef[[x]]) ) , all= TRUE ))  

match.coeff <- lapply(1:length(match.beta), function(x) rbind(match.beta[[x]],match.sesq[[x]]))  

match.me <- lapply(match.me, fn.matfmt)
match.me <- lapply(match.me,function(x) {names(x)[grepl("te",names(x))] <- gsub("te","me",names(x)[grepl("te",names(x))] )
   return(x) })

match.coeff <- lapply(match.coeff, fn.matfmt)
match.coeff <- lapply(match.coeff,function(x) {names(x)[grepl("te",names(x))] <- gsub("te","coeff",names(x)[grepl("te",names(x))] )
  return(x) })

#Table 1
mat.coef <- match.coeff[2:3]
obscount <- obs.count[2:3]
matchl <- lapply(1:length(mat.coef),function(x) merge(match.me[[x]],mat.coef[[x]], by=intersect(names(match.me[[x]]),names(mat.coef[[x]])), all= TRUE  ) )
matchl <- lapply(1:length(matchl),function(x) merge(matchl[[x]],obscount[[x]], by=intersect(names(matchl[[x]]),names(obscount[[x]])), all= TRUE  ) )
matchl <- lapply(1:length(matchl),function(x) { names(matchl[[x]])[grepl("coeff",names(matchl[[x]])) | grepl("N",names(matchl[[x]])) | grepl("me.",names(matchl[[x]]))] <-
  paste0(names(matchl[[x]])[grepl("coeff",names(matchl[[x]])) | grepl("N",names(matchl[[x]])) | grepl("me.",names(matchl[[x]]))],".",x )  
   return(matchl[[x]])  })

mprint <- merge(matchl[[1]],matchl[[2]], by = intersect(names(matchl[[1]]),names(matchl[[2]])), all = TRUE)
mprint$Cond[mprint$Cond=="Mortality"] <- mprint$Outcome[mprint$Cond=="Mortality"]
mprint$Outcome[mprint$Cond==mprint$Outcome] <- "Mortality"
mprint <- mprint[order(mprint$Cond,mprint$ov,mprint$Outcome,mprint$id),]

obs <- unique(mprint$Cond)

fprint <-mprint[,c("Outcome","me.Men.1","coeff.Men.1","me.Women.1","coeff.Women.1","N.1","me.Men.2","coeff.Men.2","me.Women.2","coeff.Women.2","N.2")]
fprint$Outcome[is.na(fprint$N.2)] <- ""
fprint$N.1 <- as.character(round(fprint$N.1,0))
fprint$N.2 <- as.character(round(fprint$N.2,0))
fprint <-fprint[,c("Outcome","me.Men.1","me.Women.1","coeff.Men.1","coeff.Women.1","me.Men.2","me.Women.2","coeff.Men.2","coeff.Women.2")]

addtorow <- list()
addtorow$pos <- list(0,0,0,1,2,10)
addtorow$command <- c( "& \\multicolumn{4}{c}{\\textbf{Age and quarter only}} & \\multicolumn{4}{c}{\\textbf{Full set of controls}}\\\\\n", 
                       "& \\multicolumn{2}{c}{\\textit{Marginal Effect}} &  \\multicolumn{2}{c}{\\textit{Coefficient}}  & \\multicolumn{2}{c}{\\textit{Marginal Effect}} &  \\multicolumn{2}{c}{\\textit{Coefficient}}\\\\\n", 
                       paste0(paste(gsub("me\\.","",gsub("coeff\\.","",gsub(".[0-9]+","",names(fprint)))),collapse = " & "),"\\\\\n"), 
                       paste0("& \\multicolumn{",length(fprint)-1,"}{c}{",obs[1],"} \\\\\n") , 
                       paste0("& \\multicolumn{",length(fprint)-1,"}{c}{",obs[2],"} \\\\\n") , 
                       paste0("& \\multicolumn{",length(fprint)-1,"}{c}{",obs[3],"} \\\\\n")) 
print(xtable(fprint),add.to.row = addtorow,include.colnames = FALSE,include.rownames = FALSE, file=file.path(dataIn,paste0("Table_Spec.tex")))


#Table 2
obscount <- obs.count
matchcl <- lapply(1:length(match.coeff),function(x) merge(match.coeff[[x]],rsq[[x]], by=intersect(names(match.coeff[[x]]),names(rsq[[x]])), all= TRUE  ) )
matchcl <- lapply(1:length(matchcl),function(x) { names(matchcl[[x]])[grepl("coeff",names(matchcl[[x]])) | grepl("Rsq",names(matchcl[[x]])) | grepl("me.",names(matchcl[[x]]))] <-
  paste0(names(matchcl[[x]])[grepl("coeff",names(matchcl[[x]])) | grepl("Rsq",names(matchcl[[x]])) | grepl("me.",names(matchcl[[x]]))],".",x )  
return(matchcl[[x]])  })

fn.mrg <- function (x,y) merge(x,y,by=intersect(names(x),names(y)), all = TRUE)
ttwo <- Reduce(fn.mrg,matchcl)
ttwo$Cond[ttwo$Cond=="Mortality"] <- ttwo$Outcome[ttwo$Cond=="Mortality"]
ttwo$Outcome[ttwo$Cond==ttwo$Outcome] <- "Mortality"
ttwo <- ttwo[order(ttwo$Cond,ttwo$ov,ttwo$Outcome,ttwo$id),]
ttwo$Outcome[ttwo$id==2] <- ""
ttwo <- ttwo[,c("Outcome","coeff.Women.1","coeff.Men.1","Rsq.1","coeff.Women.3","coeff.Men.3","Rsq.3","coeff.Women.2","coeff.Men.2","Rsq.2")]

ttwo$Rsq.1 <-  as.character(round(ttwo$Rsq.1,4))
ttwo$Rsq.2 <-   as.character( round(ttwo$Rsq.2,4))
ttwo$Rsq.3 <-    as.character(round(ttwo$Rsq.3,4))


addtorow <- list()
addtorow$pos <- list(0,0,1,2,10)
addtorow$command <- c( "& \\multicolumn{3}{c}{\\textbf{Matched pair fixed-effects}} &   \\multicolumn{3}{c}{\\textbf{Full set of controls}} & \\multicolumn{3}{c}{\\textbf{Age and quarter only}} \\\\\n", 
                        paste0(paste(gsub("me\\.","",gsub("coeff\\.","",gsub(".[0-9]+","",names(ttwo)))),collapse = " & "),"\\\\\n"), 
                       paste0("& \\multicolumn{",length(ttwo)-1,"}{c}{",obs[1],"} \\\\\n") , 
                       paste0("& \\multicolumn{",length(ttwo)-1,"}{c}{",obs[2],"} \\\\\n") , 
                       paste0("& \\multicolumn{",length(ttwo)-1,"}{c}{",obs[3],"} \\\\\n")) 
print(xtable(ttwo),add.to.row = addtorow,include.colnames = FALSE,include.rownames = FALSE, file=file.path(dataIn,paste0("Table_FECompare.tex")))



